
# generate table F4

# load packages
library(rio) # load data
library(tidyverse) # data manipulation
library(stargazer) # generate tables
library(ivreg) # estimate 2SLS
library(lmtest) # cluster standard errors
library(sandwich) # cluster standard errors

# set working directory
setwd("~/replication_files/")

# load data for analysis
data_with_dictionary <- import("data/full_data.csv") %>%
  mutate(across(c(imf_program,field_discovery,grad_school_econ_usa,iso3c,year), as.factor)) %>% # treat factors as factors
  # generate instruments
  group_by(iso3c) %>%
  mutate(country_specific_resource_mentions_absolute = mean(resource_mentions_absolute),
         country_specific_resource_mentions_tfidf = mean(resource_mentions_tfidf)) %>%
  ungroup() %>%
  group_by(year) %>%
  mutate(year_specific_resource_mentions_absolute = mean(resource_mentions_absolute),
         year_specific_resource_mentions_tfidf = mean(resource_mentions_tfidf)) %>%
  ungroup() %>%
  mutate(instrument_absolute = country_specific_resource_mentions_absolute * year_specific_resource_mentions_absolute,
         instrument_tfidf = country_specific_resource_mentions_tfidf * year_specific_resource_mentions_tfidf)

# model 1, table F4
first_stage1 <- lm(resource_mentions_absolute_lag ~ instrument_absolute + 
                     previous_policy +  grad_school_econ_usa + fdi_performance_index_lag +
                     imf_program + price_crudeoil_lag + price_crudeoil_difference + 
                     resource_rents_lag + log_gdp_per_capita_lag + gdp_growth_lag + field_discovery_lag + 
                     polyarchy + left_executive + protest + year + iso3c, data = data_with_dictionary)

# cluster standard errors
first_stage1_r <- first_stage1 %>% 
  coeftest(vcovHC(first_stage1, type = 'HC0', cluster =  ~ iso3c))

# model 2, table F4
first_stage2 <- lm(resource_mentions_tfidf_lag ~ instrument_tfidf + 
                     previous_policy +  grad_school_econ_usa + fdi_performance_index_lag +
                     imf_program + price_crudeoil_lag + price_crudeoil_difference + 
                     resource_rents_lag + log_gdp_per_capita_lag + gdp_growth_lag + field_discovery_lag + 
                     polyarchy + left_executive + protest + year + iso3c, data = data_with_dictionary)

# cluster standard errors
first_stage2_r <- first_stage2 %>% 
  coeftest(vcovHC(first_stage2, type = 'HC0', cluster =  ~ iso3c))

# generate table F4
stargazer(first_stage1_r,first_stage2_r, # type = "text", 
          dep.var.labels = c("Natural Resource Term Frequency","Natural Resource Term Frequency (TF--IDF)"), 
          omit = c("iso3c","year"),
          covariate.labels = c("Instrument: Natural Resource Term Frequency","Instrument: Natural Resource Term Frequency (TF--IDF)",
                               "Previous Policy Passage = 1",
                               "Technocratic Finance Minister = 1", "FDI Performance Index",
                               "IMF Program = 1", "Crude Oil Price, t--1", "Crude Oil Price, Delta",
                               "Resource Rents, t--1", "Log GDP Per Capita, t--1", "GDP Growth, t--1", "Field Discovery = 1, t--1",
                               "Polyarchy", "Left Executive", "Protest Count"),
          keep.stat = c("n", "rsq", "f"))

# retrieve fit stats
stargazer(first_stage1,first_stage2, dep.var.labels = c("Natural Resource Term Frequency","Natural Resource Term Frequency (TF--IDF)"), # type = "text", 
          omit = c("iso3c","year"),
          covariate.labels = c("Instrument: Natural Resource Term Frequency","Instrument: Natural Resource Term Frequency (TF--IDF)",
                               "Previous Policy Passage = 1",
                               "Technocratic Finance Minister = 1", "FDI Performance Index",
                               "IMF Program = 1", "Crude Oil Price, t--1", "Crude Oil Price, Delta",
                               "Resource Rents, t--1", "Log GDP Per Capita, t--1", "GDP Growth, t--1", "Field Discovery = 1, t--1",
                               "Polyarchy", "Left Executive", "Protest Count"),
          keep.stat = c("n", "rsq", "f"))
